##########################################
# Replication Data for Proksch, Lowe, Wäckerle, Soroka. (2018). Multilingual Sentiment Analysis: A New Approach to Measuring Conflict in Legislative Speeches. Legislative Studies Quarterly, Forthcoming.
##########################################

#Part 3: Wordscores Replication
#Most of this code follows the replication provided by Herzog and Benoit for the article "The Most Unkindest Cuts: Speaker Selection and Expressed Government Dissent During Economic Crisis"

rm(list = ls(all = TRUE))
library(rstudioapi)

current_path <- getActiveDocumentContext()$path 
setwd(dirname(current_path ))
library(rjags)
library(ggplot2)
library(texreg)

### PATHS ##############################
dataSub <- "3_working_data_speakers.RData"
dataSub_senti <- "3_working_data_speakers_senti.RData"

########################################

### DATA ##############################
load("3_outcome_model1_posterior.RData")
load("3_outcome_model2_posterior.RData")
load("3_outcome_model3_posterior.RData")
load("3_outcome_model1_senti_posterior.RData")
load("3_outcome_model2_senti_posterior.RData")
load("3_outcome_model3_senti_posterior.RData")
########################################

coef.names <- c(
    "Intercept",
    "Constituency unemployment",
    "Electoral safety",
    "Legislative seniority",
    "Government",
    "Cabinet member",
    "Crisis",
    "Const. unemployment x Crisis",
    "Electoral safety x Crisis",
    "Government x Crisis",
    "Const. unemployment x Government",
    "Electoral safety x Government",
    "Const. unemployment x Government x Crisis",
    "Electoral safety x Government x Crisis")

stats.names <- c("$\\mu_{\\alpha}$", "$\\sigma_{\\alpha}$", "$\\sigma$")


# Extract model coefficients and quantiles
# ----------------------------------------
model.results <- data.frame(
    variable = character(), 
    model = character(),
    coef = numeric(),
    ci.low = numeric(),
    ci.up = numeric())
for (j in 1:3) {
    s <- summary(get(paste0("pos",j,".posterior")))
    model <- paste("Model",j)

    # coefficients
    # ------------
    # intercepts and betas
    a <- mean(s$statistics[grepl("a\\[",rownames(s$statistics)),][, "Mean"])   
    b <- s$statistics[grepl("b\\[",rownames(s$statistics)),][, "Mean"]

    # mu.a
    mu.a <- s$statistics[rownames(s$statistics)=="mu.a"][1]

    # sigma.a
    sigma.a <- s$statistics[rownames(s$statistics)=="sigma.a"][1]

    # sigma
    sigma <- s$statistics[rownames(s$statistics)=="sigma"][1]
    
    # combine together
    coef <- c(a, b, mu.a, sigma.a, sigma)
    

    # credible intervals
    # ------------------
    # intercepts and betas
    ci.a <- colMeans(s$quantiles[grepl("a\\[",rownames(s$quantiles)),][, c("2.5%","97.5%")])
    ci.b <- s$quantiles[grepl("b\\[",rownames(s$quantiles)),][, c("2.5%","97.5%")]

    # mu.a
    ci.mu.a <- s$quantiles[rownames(s$statistics)=="mu.a", c("2.5%","97.5%")]
    
    # sigma.a
    ci.sigma.a <- s$quantiles[rownames(s$statistics)=="sigma.a", c("2.5%","97.5%")]
    
    # sigma
    ci.sigma <- s$quantiles[rownames(s$statistics)=="sigma", c("2.5%","97.5%")]
    
    # combine together    
    ci <- rbind(ci.a, ci.b, ci.mu.a, ci.sigma.a, ci.sigma)  
    
    # labels
    variable <- c(coef.names[1:(length(b)+1)], stats.names)

    # add to data frame
    d <- data.frame(model, variable, coef, ci.low=ci[,1], ci.up=ci[,2])
    
    model.results <- rbind(model.results, d)
}

model.results$type <- ""
model.results$type[model.results$variable %in% stats.names] <- "stat"
model.results$type[!(model.results$variable %in% stats.names)] <- "beta"

# factor ordering of variables for plots
n <- length(coef.names)
model.results$variable <- factor(model.results$variable, levels=c(stats.names, coef.names[order(seq(1,n,1), decreasing=TRUE)]))


# Plot coefficients
# -----------------
d <- subset(model.results, model.results$type=="beta")
d <- d[!(d$variable=="Intercept"),]
p <- ggplot(data=d) +
    theme_bw() +ggtitle("Wordscores Models")+
    facet_wrap(~ model) +
    geom_pointrange(
        aes(x=variable, y=coef, ymin=ci.low, ymax=ci.up),
        lwd = 1/2, position = position_dodge(width = 1/2),
        shape = 21, fill = "black") +
    geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
    coord_flip() +
    xlab("") +
    ylab("Coefficients and 95% credible intervals") +
    theme(text = element_text(size=16)) +
    theme(axis.title.x = element_text(size=12, vjust=-0.5))


###########################################
# Sentiment
###########################################

# Extract model coefficients and quantiles
# ----------------------------------------
model.senti.results <- data.frame(
  variable = character(), 
  model = character(),
  coef = numeric(),
  ci.low = numeric(),
  ci.up = numeric())
j=1
for (j in 1:3) {
  s <- summary(get(paste0("pos",j,".senti.posterior")))
  model <- paste("Model Senti",j)
  
  # coefficients
  # ------------
  # intercepts and betas
  a <- mean(s$statistics[grepl("a\\[",rownames(s$statistics)),][, "Mean"])   
  b <- s$statistics[grepl("b\\[",rownames(s$statistics)),][, "Mean"]
  
  # mu.a
  mu.a <- s$statistics[rownames(s$statistics)=="mu.a"][1]
  
  # sigma.a
  sigma.a <- s$statistics[rownames(s$statistics)=="sigma.a"][1]
  
  # sigma
  sigma <- s$statistics[rownames(s$statistics)=="sigma"][1]
  
  # combine together
  coef <- c(a, b, mu.a, sigma.a, sigma)
  
  
  # credible intervals
  # ------------------
  # intercepts and betas
  ci.a <- colMeans(s$quantiles[grepl("a\\[",rownames(s$quantiles)),][, c("2.5%","97.5%")])
  ci.b <- s$quantiles[grepl("b\\[",rownames(s$quantiles)),][, c("2.5%","97.5%")]
  
  # mu.a
  ci.mu.a <- s$quantiles[rownames(s$statistics)=="mu.a", c("2.5%","97.5%")]
  
  # sigma.a
  ci.sigma.a <- s$quantiles[rownames(s$statistics)=="sigma.a", c("2.5%","97.5%")]
  
  # sigma
  ci.sigma <- s$quantiles[rownames(s$statistics)=="sigma", c("2.5%","97.5%")]
  
  # combine together    
  ci <- rbind(ci.a, ci.b, ci.mu.a, ci.sigma.a, ci.sigma)  
  
  # labels
  variable <- c(coef.names[1:(length(b)+1)], stats.names)
  
  # add to data frame
  d <- data.frame(model, variable, coef, ci.low=ci[,1], ci.up=ci[,2])
  
  model.senti.results <- rbind(model.senti.results, d)
}

model.senti.results$type <- ""
model.senti.results$type[model.senti.results$variable %in% stats.names] <- "stat"
model.senti.results$type[!(model.senti.results$variable %in% stats.names)] <- "beta"

# factor ordering of variables for plots
n <- length(coef.names)
model.senti.results$variable <- factor(model.senti.results$variable, levels=c(stats.names, coef.names[order(seq(1,n,1), decreasing=TRUE)]))


# Plot coefficients
# -----------------
d <- subset(model.senti.results, model.senti.results$type=="beta")
d <- d[!(d$variable=="Intercept"),]
p.senti <- ggplot(data=d) +
  theme_bw() +ggtitle("Sentiment Models")+
  facet_wrap(~ model) +
  geom_pointrange(
    aes(x=variable, y=coef, ymin=ci.low, ymax=ci.up),
    lwd = 1/2, position = position_dodge(width = 1/2),
    shape = 21, fill = "black") +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  coord_flip() +
  xlab("") +
  ylab("Coefficients and 95% credible intervals") +
  theme(text = element_text(size=16)) +
  theme(axis.title.x = element_text(size=12, vjust=-0.5))



# Generate tables
# ---------------
load(dataSub)

N <- nrow(dataSub)
N.groups <- length(unique(dataSub$m))

pos1 <- with(model.results[model.results$model=="Model 1",],
             createTexreg(
               coef.names=as.character(variable),
               coef=coef,
               ci.low=ci.low,
               ci.up=ci.up,
               gof=c(N, N.groups),
               gof.names=c("N obs.", "N legislators"),
               gof.decimal=c(FALSE, FALSE))
)

pos2 <- with(model.results[model.results$model=="Model 2",],
             createTexreg(
               coef.names=as.character(variable),
               coef=coef,
               ci.low=ci.low,
               ci.up=ci.up,
               gof=c(N, N.groups),
               gof.names=c("N obs.", "N legislators"),
               gof.decimal=c(FALSE, FALSE))
)

pos3 <- with(model.results[model.results$model=="Model 3",],
             createTexreg(
               coef.names=as.character(variable),
               coef=coef,
               ci.low=ci.low,
               ci.up=ci.up,
               gof=c(N, N.groups),
               gof.names=c("N obs.", "N legislators"),
               gof.decimal=c(FALSE, FALSE))
)


#texreg(
#    list(pos1, pos2, pos3),
#    custom.model.names = c("Model 1","Model 2", "Model 3"),
#    reorder.coef = c(1,2,3,4,5,6,7,11,12,13,14,15,16,17,8,9,10),
#    digits=2,
#    single.row=FALSE,
#   leading.zero=TRUE,
#    caption.above=TRUE,
#    booktabs=TRUE,
#    bold=0.05,
#    use.packages=FALSE,
#    table=FALSE,
#    stars=0,
#    ci.test = NULL,
#    ci.force=TRUE,
#    center=TRUE,
#    file="tables/table_position_taking.tex"
#)


# Generate Sentiment tables
# ---------------
load(dataSub)

N <- nrow(dataSub)
N.groups <- length(unique(dataSub$m))

pos1_senti <- with(model.senti.results[model.senti.results$model=="Model Senti 1",],
             createTexreg(
               coef.names=as.character(variable),
               coef=coef,
               ci.low=ci.low,
               ci.up=ci.up,
               gof=c(N, N.groups),
               gof.names=c("N obs.", "N legislators"),
               gof.decimal=c(FALSE, FALSE))
)

pos2_senti <- with(model.senti.results[model.senti.results$model=="Model Senti 2",],
             createTexreg(
               coef.names=as.character(variable),
               coef=coef,
               ci.low=ci.low,
               ci.up=ci.up,
               gof=c(N, N.groups),
               gof.names=c("N obs.", "N legislators"),
               gof.decimal=c(FALSE, FALSE))
)

pos3_senti <- with(model.senti.results[model.senti.results$model=="Model Senti 3",],
             createTexreg(
               coef.names=as.character(variable),
               coef=coef,
               ci.low=ci.low,
               ci.up=ci.up,
               gof=c(N, N.groups),
               gof.names=c("N obs.", "N legislators"),
               gof.decimal=c(FALSE, FALSE))
)


#texreg(
#  list(pos1_senti, pos2_senti, pos3_senti),
#  custom.model.names = c("Model 1","Model 2", "Model 3"),
#  reorder.coef = c(1,2,3,4,5,6,7,11,12,13,14,15,16,17,8,9,10),
#  digits=2,
#  single.row=FALSE,
#  leading.zero=TRUE,
#  caption.above=TRUE,
#  booktabs=TRUE,
#  bold=0.05,
#  use.packages=FALSE,
#  table=FALSE,
#  stars=0,
#  ci.test = NULL,
#  ci.force=TRUE,
#  center=TRUE,
#  file="tables/table_position_taking_senti.tex"
#)



